#Packages applied

library("twitteR")
## Warning: package 'twitteR' was built under R version 4.2.2
library(tm)
## Warning: package 'tm' was built under R version 4.2.2
## Loading required package: NLP
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
## 
##     id, location
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("plotly")
## Warning: package 'plotly' was built under R version 4.2.2
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(RColorBrewer)
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.2
library(stringr)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.2
library(rtweet)
## Warning: package 'rtweet' was built under R version 4.2.2
## 
## Attaching package: 'rtweet'
## The following object is masked from 'package:twitteR':
## 
##     lookup_statuses
library(corpus)
## Warning: package 'corpus' was built under R version 4.2.2
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.2.2
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.2.2
library(syuzhet)
## Warning: package 'syuzhet' was built under R version 4.2.2
## 
## Attaching package: 'syuzhet'
## The following object is masked from 'package:rtweet':
## 
##     get_tokens

#set the directory to save data

setwd("C:/CS101_DATA_SCIENCE/Nalaza_Repo/Individual Project/Individual Project 2")

#Extract from twitter using your developer’s credentials.

CONSUMER_SECRET <- "gbuzairwHJDlzG6zmK3fFxqcEo2GIHbRy89NISU80IaqvVhjIx"
CONSUMER_KEY <- "s2yMTMlykz9iJ2I86kHzWsqtF"
ACCESS_SECRET <- "znEI53FQ3P1xhF3BAfvwiptXKtTMSAmD9BTzozQD8ax98"
ACCESS_TOKEN <- "1598161218618867713-qojQanThDqRQOlGJ4YcCXhFkYDx1gr"
setup_twitter_oauth(consumer_key = CONSUMER_KEY,
                    consumer_secret = CONSUMER_SECRET,
                    access_token = ACCESS_TOKEN,
                    access_secret = ACCESS_SECRET)
## [1] "Using direct authentication"

#Get 10000 observations including retweets

trendTweets <- searchTwitter(“#TheGameAwards”, n = 10000, lang = “en”, since = “2022-12-01”, until = “2022-12-08”, retryOnRateLimit=120)

#Converting data into dataframe

TrendTweetsDF <- twListToDF(trendTweets)

#Saving File

save(TrendTweetsDF,file = “TrendTweetsDF.Rdata”)

#using the existed data

load(file = "TrendTweetsDF.Rdata")

#Subset the retweets and the original tweets into a separate file #Plot the retweets and the original tweets using bar graph in vertical manner and include legends

#Original Tweets

Original <- subset(TrendTweetsDF, isRetweet=="FALSE",
                         select= c(text,screenName,created, isRetweet))

Original %>%
  group_by(1) %>%
  summarise(max = max(created), min= min(created))
## # A tibble: 1 × 3
##     `1` max                 min                
##   <dbl> <dttm>              <dttm>             
## 1     1 2022-12-07 23:59:36 2022-12-07 17:56:49
Tweets <- Original %>%  mutate(Created_At_Round = 

             created %>% round(units = 'hours') %>% as.POSIXct())
Minimum <- Tweets %>% pull(created) %>% min()
Minimum
## [1] "2022-12-07 17:56:49 UTC"
Maximum <- Tweets %>% pull(created) %>% max()
Maximum
## [1] "2022-12-07 23:59:36 UTC"

#Plot of the original tweets

Original_tweets <- ggplot(Tweets, aes(x = created)) +
  geom_histogram(aes(fill = ..count..)) +
  theme(legend.position = "right") +
  xlab("Time") + ylab("Number of tweets") + 
  scale_fill_gradient(low = "midnightblue", high = "purple") + 
  labs(title = "The Original Tweets", subtitle = "December 7,2022")

Original_tweets %>% ggplotly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#The Retweets

Retweets <- subset(TrendTweetsDF, isRetweet=="TRUE",
                   select= c(text,screenName,created, isRetweet))

Retweets %>%  
  group_by(1) %>%  
  summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
##     `1` max                 min                
##   <dbl> <dttm>              <dttm>             
## 1     1 2022-12-07 23:59:53 2022-12-07 17:55:49
Retweets_1 <- Retweets %>% mutate(Created_At_Round = created%>% 
                            round(units = 'hours') %>%
                            as.POSIXct())
mini <- Retweets_1 %>% pull(created) %>% min()
mini
## [1] "2022-12-07 17:55:49 UTC"
maxi <- Retweets_1 %>% pull(created) %>% max()
maxi
## [1] "2022-12-07 23:59:53 UTC"

#Plot of the retweets

Retweets_2<- ggplot(Retweets_1, aes(x = created)) +
  geom_histogram(aes(fill = ..count..)) +
  theme(legend.position = "right") +
  xlab("Time") + ylab("Number of Retweets") + 
  scale_fill_gradient(low = "midnightblue", high = "skyblue") +
  labs(title = "The Retweets", subtitle = "December 7,2022")

Retweets_2 %>% ggplotly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.